Load libraries

knitr::opts_chunk$set(echo = TRUE)

list.of.packages <- c("gsheet", "tidyverse", "janitor", "plotly", "glmmTMB", "metafor", "broom.mixed", "car") #add new libraries here 

new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

# Load all libraries 
lapply(list.of.packages, FUN = function(X) {
  do.call("require", list(X)) 
})
sessionInfo()

Prepare data

NOTE: data is read directly from the GoogleSheet using a share link that was set to “anyone with a link can view”

# Read in data from GoogleSheet 
data.fert <- as_tibble(gsheet2tbl('https://docs.google.com/spreadsheets/d/111SuH548Et6HDckjbQjtYk-B8A5VfNkUYZgcUNRPxxY/edit?usp=sharing'))
## Warning: Missing column names filled in: 'X22' [22]
#replace NR (not reported) with NA and convert columns to factor / numeric where needed 
data.fert <- data.fert %>% 
  na_if("NR") %>%     
  mutate_at(c('Phylum', 'Study', 'Taxonomic Group', 'Common name', 'Latin name', 'Error statistic'), as.factor) %>%
  mutate_at(c('pH Experimental', 'pH Control', 'pCO2 Experimental', 'pCO2 Control', 'Ave. Fert. % @ pH', 'Error % @ pH', '# Trials @ pH', 'Insemination time'), as.numeric) %>%
  clean_names()  # fill in spaces with underscores for column names 
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
#data.fert %>% select(study, ave_fert_percent_p_h, error_percent_p_h, error_statistic, number_trials_p_h) %>% View()

Explore data with figures

ggplotly(
data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=taxonomic_group, col=taxonomic_group, text=`common_name`)) + 
  geom_point(size=1.5, width=0.02) +
  facet_wrap(~phylum, scale="free") +
  geom_smooth(method="lm", se=TRUE, aes(fill=taxonomic_group)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum, linear") +
  theme_minimal() 
)
## Warning: Ignoring unknown parameters: width
## Warning: Removed 25 rows containing non-finite values (stat_smooth).
#ggplotly(
data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=taxonomic_group, col=taxonomic_group, text=`common_name`)) + 
  geom_point(size=1, width=0.02) +
  facet_wrap(~phylum, scale="free") +
  geom_smooth(method="lm", se=TRUE, formula=y ~ poly(x, 2, raw=TRUE), aes(fill=taxonomic_group)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum, polynomial") +
  theme_minimal()
## Warning: Ignoring unknown parameters: width

## Warning: Removed 25 rows containing non-finite values (stat_smooth).
## Warning in predict.lm(model, newdata = new_data_frame(list(x = xseq)), se.fit =
## se, : prediction from a rank-deficient fit may be misleading
## Warning: Removed 25 rows containing missing values (geom_point).

#)

Convert all error values to separate SE & SD columns

95% Confidence Interval

Upper 95% CI = Mean + 1.96*SE; I recorded the difference between the upper 95%CI and the mean (Upper 95%CI - Mean). To convert I will use:
SE = (Upper 95%CI - Mean) / 1.96

SD = ((Upper 95%CI - Mean) / 1.96) * sqrt(n)

Standard Deviation

SE= SD/sqrt(n); where n=sample size. To convert I will use that equation (since I recorded sample size, i.e. number of trials at each pH)

SD = SE*sqrt(n)

Cases where I do not know the type of error statistic reported

I will use that statistic as-is (no conversion), thereby assuming it is SE.

data.fert <- data.fert %>% 
  mutate(SE =  case_when(error_statistic == "SD" ~ error_percent_p_h/sqrt(number_trials_p_h), 
         error_statistic == "95% CI" ~ error_percent_p_h/1.96,
         is.na(error_statistic) ~ error_percent_p_h, 
         error_statistic == "SE" ~ error_percent_p_h)) 

data.fert <- data.fert %>% 
  mutate(SD =  case_when(error_statistic == "SE" ~ error_percent_p_h*sqrt(number_trials_p_h), 
         error_statistic == "95% CI" ~ (error_percent_p_h/1.96)*sqrt(number_trials_p_h),
         is.na(error_statistic) ~ error_percent_p_h, 
         error_statistic == "SD" ~ error_percent_p_h)) 

Calculate pH experimental - pH control

data.fert <- data.fert %>% 
  mutate(pH_delta = p_h_control-p_h_experimental)

Convert % fertilization data to proportion fertilized, and replace any values >1 (aka 100% fertilized) with 1

data.fert <- data.fert %>% 
  mutate(ave_fert_proport = case_when(ave_fert_percent_p_h <= 100 ~ ave_fert_percent_p_h/100,
                                      ave_fert_percent_p_h > 100 ~ 1))

Transform proportion fertilized data to remove 1’s

Transormation equation source: https://cran.r-project.org/web/packages/betareg/vignettes/betareg.pdf, " … if y also assumes the extremes 0 and 1, a useful transformation in practice is (y · (n − 1) + 0.5)/n where n is the sample size (Smithson and Verkuilen 2006)."

HOWEVER

issue is that a few studies only had 1 trial per pH, so the transformation results in NA values

 # data.fert$ave_fert_proport.t <- data.fert$ave_fert_proport*((data.fert$number_trials_p_h-1) + 0.5) / data.fert$number_trials_p_h

Instead, I simply subtracted 0.001 from all data (but not sure if I will use that transformed data)

data.fert$ave_fert_proport.t <- data.fert$ave_fert_proport - 0.001

Inspect data

data.fert$ave_fert_proport.t %>% hist()

ggplot(data.fert, aes(group=phylum, col=phylum)) + geom_density(aes(ave_fert_proport.t))
## Warning: Removed 13 rows containing non-finite values (stat_density).

# How many studies per ~phylum? 
data.fert %>%
  select(phylum, study) %>%
  distinct(phylum, study) %>%
  group_by(phylum) %>% count()
## # A tibble: 4 x 2
## # Groups:   phylum [4]
##   phylum            n
##   <fct>         <int>
## 1 Cnidaria          4
## 2 Crustacea         5
## 3 Echinodermata    12
## 4 Mollusca         18
# How many studies per taxonomic group? 
data.fert %>%
  select(phylum, study, taxonomic_group) %>%
  distinct(phylum, study, taxonomic_group) %>%
  group_by(taxonomic_group) %>% count()
## # A tibble: 10 x 2
## # Groups:   taxonomic_group [10]
##    taxonomic_group     n
##    <fct>           <int>
##  1 abalone             2
##  2 clam                5
##  3 copepod             3
##  4 coral               4
##  5 mussel              4
##  6 non-copepod         2
##  7 non-urchin          2
##  8 oyster              5
##  9 scallop             3
## 10 sea urchin         11

Calculate weights for models

NOTE: not working due to missing data.

weights <- metafor::escalc(measure='MN',
                mi=data.fert$ave_fert_percent_p_h,
                sdi = data.fert$SD,
                ni=data.fert$number_trials_p_h, options(na.action="na.pass"))  
data.fert$w <-weights$vi

Generate binomial models

test1 <- glmmTMB(ave_fert_proport ~ p_h_experimental + taxonomic_group/phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')
test2 <- glmmTMB(ave_fert_proport ~ p_h_experimental*phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test3 <- glmmTMB(ave_fert_proport ~ p_h_experimental:phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test4 <- glmmTMB(ave_fert_proport ~ p_h_experimental + phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test5 <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test6 <- glmmTMB(ave_fert_proport ~ 1 + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test7 <- glmmTMB(ave_fert_proport ~ phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test8 <- glmmTMB(ave_fert_proport ~ p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Determine best fit model

AIC(test1, test2, test3, test4, test5, test6, test7, test8) #test2 smallest AIC.  
## Warning in AIC.default(test1, test2, test3, test4, test5, test6, test7, : models
## are not all fitted to the same number of observations
##       df      AIC
## test1 42       NA
## test2  9 1169.716
## test3  6 1208.641
## test4  6 1206.182
## test5  3 1210.887
## test6  2 1450.084
## test7  5 1440.438
## test8  2 3511.136
#test differene between models 
anova(test3, test2) 
## Data: drop_na(data.fert, w)
## Models:
## test3: ave_fert_proport ~ p_h_experimental:phylum + (1 | study), zi=~0, disp=~1
## test2: ave_fert_proport ~ p_h_experimental * phylum + (1 | study), zi=~0, disp=~1
##       Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## test3  6 1208.6 1230.7 -598.32   1196.6                             
## test2  9 1169.7 1202.9 -575.86   1151.7 44.924      3  9.601e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Examine model test2
car::Anova(test2) #phylum, pH, & phylum:pH sign. factors 
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                           Chisq Df Pr(>Chisq)    
## p_h_experimental        156.796  1  < 2.2e-16 ***
## phylum                   10.563  3    0.01434 *  
## p_h_experimental:phylum  42.497  3  3.147e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(test2)
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental * phylum + (1 | study)
## Data: drop_na(data.fert, w)
## Weights: w
## 
##      AIC      BIC   logLik deviance df.resid 
##   1169.7   1202.9   -575.9   1151.7      285 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  study  (Intercept) 0.9459   0.9726  
## Number of obs: 294, groups:  study, 31
## 
## Conditional model:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                            6.1350     5.3425   1.148   0.2508    
## p_h_experimental                      -0.7656     0.6731  -1.137   0.2554    
## phylumCrustacea                      -12.5491     5.5114  -2.277   0.0228 *  
## phylumEchinodermata                  -22.6771     5.5012  -4.122 3.75e-05 ***
## phylumMollusca                       -13.7549     6.0574  -2.271   0.0232 *  
## p_h_experimental:phylumCrustacea       1.6380     0.6958   2.354   0.0186 *  
## p_h_experimental:phylumEchinodermata   2.9631     0.6942   4.268 1.97e-05 ***
## p_h_experimental:phylumMollusca        1.8986     0.7692   2.468   0.0136 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Generate estimates & confidence intervals (log likelihood)
confint(test2)
##                                                 2.5 %      97.5 %    Estimate
## cond.(Intercept)                           -4.3361108  16.6060479   6.1349685
## cond.p_h_experimental                      -2.0847698   0.5536213  -0.7655743
## cond.phylumCrustacea                      -23.3512502  -1.7470097 -12.5491300
## cond.phylumEchinodermata                  -33.4593535 -11.8949260 -22.6771397
## cond.phylumMollusca                       -25.6272057  -1.8825189 -13.7548623
## cond.p_h_experimental:phylumCrustacea       0.2741953   3.0017240   1.6379596
## cond.p_h_experimental:phylumEchinodermata   1.6025270   4.3237081   2.9631176
## cond.p_h_experimental:phylumMollusca        0.3910182   3.4062259   1.8986221
## study.cond.Std.Dev.(Intercept)              0.7333418   1.2899079   0.9725962
# Instpect residuals ~ fitted values 
aa5 <- augment(test2, data=drop_na(data.fert, w))
## Warning in mr - fitted(object): longer object length is not a multiple of
## shorter object length
#ggplotly(
ggplot(aa5, aes(x=.fitted,y=.resid)) + 
    geom_point() + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).

#)

Generate model predictions and plot against real data

ph.min.max <- drop_na(data.fert, w) %>% 
  select(phylum, p_h_experimental) %>% 
  group_by(phylum) %>% 
  summarize(min=min(p_h_experimental, na.rm=TRUE), max=max(p_h_experimental, na.rm=TRUE))
  
phylum.list <- list()
for (i in 1:nrow(ph.min.max)) {
  phylum.list[[i]] <- data.frame(ph=c(seq(from=as.numeric(ph.min.max[i,"min"]), 
                            to=as.numeric(ph.min.max[i,"max"]), 
                            by=0.01)),
                   phylum=rep(c(ph.min.max[i,"phylum"])))
}
new.data <- bind_rows(phylum.list) %>% purrr::set_names(c("p_h_experimental", "phylum"))
new.data$study <- NA 
new.data$w <- NA 

predict.test.df <- predict(test2, newdata = new.data, se.fit = TRUE, type="response")
predict.test.df.df <- predict.test.df %>%
  as.data.frame() %>%
  cbind(new.data)

#scales::show_col(c("#e41a1c","#4daf4a","#ff7f00","#984ea3",'#377eb8'))

Figure

# Data with beta regression model fit 
#ggplotly(
ggplot() + 
  geom_jitter(data=drop_na(data.fert, w), aes(x=p_h_experimental, y=ave_fert_proport, group=phylum, col=phylum), size=1.2, width=0.03) +
  facet_wrap(~phylum, scales="free") + theme_minimal() +
  ggtitle("% fertilization ~ pH with binomial-regression model predictions") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  scale_color_manual(name=NULL, values=c("#e41a1c","#ff7f00","#4daf4a",'#377eb8')) +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.test.df.df, aes(x=p_h_experimental, y=fit), col="gray50") + #, col=phylum
  geom_ribbon(data = predict.test.df.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit, fill=phylum), linetype=2, alpha=0.1, col="gray50") #)
## Warning: Removed 12 rows containing missing values (geom_point).

Figure caption:

Fertilization success (%) by experimental pH across marine taxa examined in this review. Meta-analysis was performed using a binomial regression model, and indicates that fertilization success decreases with pH across Cnidaria (4 studies), Crustacea (5 studies), Echinodermata (12 studies), and Mollusca (18 studies). Each point reflects the average % fertilization reported by one study at an experimental pH.

Run GLMs on each phylum

  1. Are slopes significantly different from zero?
  2. If so, what is the equation?

Cnidaria

model.cnidaria <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidaria"), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Anova(model.cnidaria)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)
## p_h_experimental 0.7736  1     0.3791
summary(model.cnidaria) 
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert %>% drop_na(w) %>% filter(phylum == "Cnidaria")
## Weights: w
## 
##      AIC      BIC   logLik deviance df.resid 
##    155.8    161.5    -74.9    149.8       46 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  study  (Intercept) 0.009966 0.09983 
## Number of obs: 49, groups:  study, 3
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        4.4307     5.2035   0.852    0.394
## p_h_experimental  -0.5806     0.6601  -0.880    0.379
predict.cnidaria <- predict(model.cnidaria, newdata = subset(new.data, phylum=="Cnidaria"), se.fit = TRUE, type="response")
predict.cnidaria.df <- predict.cnidaria %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Cnidaria"))

ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidaria"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, color="#e41a1c") +
  ggtitle("Cnidaria") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.cnidaria.df, aes(x=p_h_experimental, y=fit), col="#e41a1c") + 
  geom_ribbon(data = predict.cnidaria.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#e41a1c") 

Crustacea

model.crustacea <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Crustacea"), binomial(link = "logit"), na.action=na.exclude, weights=w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Anova(model.crustacea)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)    
## p_h_experimental 26.088  1  3.263e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model.crustacea) 
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert %>% drop_na(w) %>% filter(phylum == "Crustacea")
## Weights: w
## 
##      AIC      BIC   logLik deviance df.resid 
##    119.6    123.2    -56.8    113.6       21 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  study  (Intercept) 1.837    1.355   
## Number of obs: 24, groups:  study, 5
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -7.0292     1.5082  -4.661 3.15e-06 ***
## p_h_experimental   0.9159     0.1793   5.108 3.26e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict.crustacea <- predict(model.crustacea, newdata = subset(new.data, phylum=="Crustacea"), se.fit = TRUE, type="response")
predict.crustacea.df <- predict.crustacea %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Crustacea"))

ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% filter(phylum=="Crustacea"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#ff7f00") +
  ggtitle("Crustacea") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.crustacea.df, aes(x=p_h_experimental, y=fit), col="#ff7f00") +
  geom_ribbon(data = predict.crustacea.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#ff7f00") 

Echinodermata

model.echinodermata <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinodermata"), binomial(link = "logit"), na.action=na.exclude, weights = w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Anova(model.echinodermata)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)    
## p_h_experimental 154.04  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model.echinodermata) 
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert %>% drop_na(w) %>% filter(phylum == "Echinodermata")
## Weights: w
## 
##      AIC      BIC   logLik deviance df.resid 
##    700.2    709.2   -347.1    694.2      142 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  study  (Intercept) 0.9716   0.9857  
## Number of obs: 145, groups:  study, 11
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -15.8468     1.3623  -11.63   <2e-16 ***
## p_h_experimental   2.1430     0.1727   12.41   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict.echinodermata <- predict(model.echinodermata, newdata = subset(new.data, phylum=="Echinodermata"), se.fit = TRUE, type="response")
predict.echinodermata.df <- predict.echinodermata %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Echinodermata"))

ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinodermata"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#4daf4a") +
  ggtitle("Echinodermata") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.echinodermata.df, aes(x=p_h_experimental, y=fit), col="#4daf4a") +
  geom_ribbon(data = predict.echinodermata.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#4daf4a") 

Mollusca

model.mollusca <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusca"), binomial(link = "logit"), na.action=na.exclude, weights=w)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Anova(model.mollusca)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)   
## p_h_experimental 9.4414  1   0.002121 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model.mollusca) 
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert %>% drop_na(w) %>% filter(phylum == "Mollusca")
## Weights: w
## 
##      AIC      BIC   logLik deviance df.resid 
##    198.1    205.1    -96.1    192.1       73 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  study  (Intercept) 0.818    0.9045  
## Number of obs: 76, groups:  study, 14
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)        -7.563      2.811  -2.690  0.00714 **
## p_h_experimental    1.128      0.367   3.073  0.00212 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict.mollusca <- predict(model.mollusca, newdata = subset(new.data, phylum=="Mollusca"), se.fit = TRUE, type="response")
predict.mollusca.df <- predict.mollusca %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Mollusca"))

ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusca"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#377eb8") +
  ggtitle("Mollusca") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.mollusca.df, aes(x=p_h_experimental, y=fit), col="#377eb8") + 
  geom_ribbon(data = predict.mollusca.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#377eb8") 
## Warning: Removed 12 rows containing missing values (geom_point).